import plotly
import pandas as pd
df =pd.read_csv(r"C:\Users\limit\Downloads\evdataset.csv.csv")# loading the dataset
df
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | JTMEB3FV6N | Monroe | Key West | FL | 33040 | 2022 | TOYOTA | RAV4 PRIME | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 42 | 0 | NaN | 198968248 | POINT (-81.80023 24.5545) | NaN | 12087972100 |
| 1 | 1G1RD6E45D | Clark | Laughlin | NV | 89029 | 2013 | CHEVROLET | VOLT | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | NaN | 5204412 | POINT (-114.57245 35.16815) | NaN | 32003005702 |
| 2 | JN1AZ0CP8B | Yakima | Yakima | WA | 98901 | 2011 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 73 | 0 | 15.0 | 218972519 | POINT (-120.50721 46.60448) | PACIFICORP | 53077001602 |
| 3 | 1G1FW6S08H | Skagit | Concrete | WA | 98237 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 39.0 | 186750406 | POINT (-121.7515 48.53892) | PUGET SOUND ENERGY INC | 53057951101 |
| 4 | 3FA6P0SU1K | Snohomish | Everett | WA | 98201 | 2019 | FORD | FUSION | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 38.0 | 2006714 | POINT (-122.20596 47.97659) | PUGET SOUND ENERGY INC | 53061041500 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 112629 | 7SAYGDEF2N | King | Duvall | WA | 98019 | 2022 | TESLA | MODEL Y | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 45.0 | 217955265 | POINT (-121.98609 47.74068) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032401 |
| 112630 | 1N4BZ1CP7K | San Juan | Friday Harbor | WA | 98250 | 2019 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 150 | 0 | 40.0 | 103663227 | POINT (-123.01648 48.53448) | BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... | 53055960301 |
| 112631 | 1FMCU0KZ4N | King | Vashon | WA | 98070 | 2022 | FORD | ESCAPE | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | 34.0 | 193878387 | POINT (-122.4573 47.44929) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033027702 |
| 112632 | KNDCD3LD4J | King | Covington | WA | 98042 | 2018 | KIA | NIRO | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 47.0 | 125039043 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032007 |
| 112633 | YV4BR0CL8N | King | Covington | WA | 98042 | 2022 | VOLVO | XC90 | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 18 | 0 | 47.0 | 194673692 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032005 |
112634 rows × 17 columns
#describing the data
df.describe()
| Postal Code | Model Year | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|
| count | 112634.000000 | 112634.000000 | 112634.000000 | 112634.000000 | 112348.000000 | 1.126340e+05 | 1.126340e+05 |
| mean | 98156.226850 | 2019.003365 | 87.812987 | 1793.439681 | 29.805604 | 1.994567e+08 | 5.296650e+10 |
| std | 2648.733064 | 2.892364 | 102.334216 | 10783.753486 | 14.700545 | 9.398427e+07 | 1.699104e+09 |
| min | 1730.000000 | 1997.000000 | 0.000000 | 0.000000 | 1.000000 | 4.777000e+03 | 1.101001e+09 |
| 25% | 98052.000000 | 2017.000000 | 0.000000 | 0.000000 | 18.000000 | 1.484142e+08 | 5.303301e+10 |
| 50% | 98119.000000 | 2020.000000 | 32.000000 | 0.000000 | 34.000000 | 1.923896e+08 | 5.303303e+10 |
| 75% | 98370.000000 | 2022.000000 | 208.000000 | 0.000000 | 43.000000 | 2.191899e+08 | 5.305307e+10 |
| max | 99701.000000 | 2023.000000 | 337.000000 | 845000.000000 | 49.000000 | 4.792548e+08 | 5.603300e+10 |
#checking the data for dtype, non null
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 112634 entries, 0 to 112633 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 112634 non-null object 1 County 112634 non-null object 2 City 112634 non-null object 3 State 112634 non-null object 4 Postal Code 112634 non-null int64 5 Model Year 112634 non-null int64 6 Make 112634 non-null object 7 Model 112614 non-null object 8 Electric Vehicle Type 112634 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 112634 non-null object 10 Electric Range 112634 non-null int64 11 Base MSRP 112634 non-null int64 12 Legislative District 112348 non-null float64 13 DOL Vehicle ID 112634 non-null int64 14 Vehicle Location 112610 non-null object 15 Electric Utility 112191 non-null object 16 2020 Census Tract 112634 non-null int64 dtypes: float64(1), int64(6), object(10) memory usage: 14.6+ MB
#checking for missing values
df.isnull().sum()
VIN (1-10) 0 County 0 City 0 State 0 Postal Code 0 Model Year 0 Make 0 Model 20 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 286 DOL Vehicle ID 0 Vehicle Location 24 Electric Utility 443 2020 Census Tract 0 dtype: int64
df["Model"]=df.groupby(["Make"])['Model'].transform(lambda x: x.fillna(x.mode()[0]))
df["Model"].isnull().sum()#checking for the null values in model column
0
df["Legislative District"]=df.groupby(["County","City","State","Postal Code"])["Legislative District"].transform(lambda x: x.fillna(x.mode()[0]))
df["Legislative District"].isnull().sum()#checking weather the missing values are filled or not
df["Electric Utility"]=df.groupby(["Make","Model","Electric Vehicle Type","Clean Alternative Fuel Vehicle (CAFV) Eligibility"])["Electric Utility"].transform(lambda x: x.fillna(x.mode()[0]))
df["Electric Utility"].isnull().sum()#checking for null values in the column Ecelctric Utility
df.dropna(subset=['Vehicle Location'], inplace=True)
df.reset_index(drop=True,inplace=True)
df
#checking the data again for any null values
df.isnull().sum()
df[df.duplicated()]
#calculating the frequency(no of occurance) for column called postal code
df["Postal Code"].value_counts()
#calculating the frequency for column model year
df["Model Year"].value_counts()
#calculating the frequency for column legislative district
df["Legislative District"].value_counts()
df["Legislative District"]=df["Legislative District"].astype("str")
df["Postal Code"]=df["Postal Code"].astype("str")
df["Model Year"]=df["Model Year"].astype("str")
#plotting for the model year (column univariant)
import plotly.express as px
area=df["Model Year"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
x=area.index,
orientation="v",
color=area.index,
text=area.values,
color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()
#plotting for the column Make
area=df["Make"].value_counts().nlargest(n=10)
fig=px.bar(y=area.values,
x=area.index,
orientation="v",
color=area.index,
text=area.values,
color_discrete_sequence=px.colors.qualitative.Bold)
fig.show()
#plotting a pie chart for electrical vehicle type
Vechile_type = df['Electric Vehicle Type'].value_counts()
fig = px.pie(Vechile_type, names=Vechile_type.index, values=Vechile_type.values, title='Pie Chart of Electric Vehicle Type')
fig.show()
#plotting the electrical range
fig=px.histogram(df,x="Electric Range",nbins=10)
fig.show()
# plotting a box plot to see any outliers in electric range
fig=px.box(df,x="Electric Range")
fig.show()
#plotting Box Plot of Electric Vehicle Type vs. Electric Range
fig_boxplot = px.box(df, x='Electric Vehicle Type', y='Electric Range', title='Box Plot of Electric Vehicle Type vs. Electric Range')
fig_boxplot.show()
import plotly.graph_objects as go
def create_ev_choropleth_map(df):
# Calculate the count of EV vehicles for each state
ev_count_by_state = df['State'].value_counts().reset_index()
ev_count_by_state.columns = ['State', 'EV Count']
# Create the Choropleth map using plotly.graph_objects
fig_choropleth = go.Figure(data=go.Choropleth(
locations=ev_count_by_state['State'],
z=ev_count_by_state['EV Count'],
locationmode='USA-states',
colorscale='Viridis',
colorbar_title='Number of EV Vehicles',
))
# Set the map title and layout
fig_choropleth.update_layout(
title_text='Choropleth Map of EV Vehicles by State',
geo_scope='usa', # Limit map scope to the USA
)
# Return the Choropleth map figure
return fig_choropleth
# Call the function and display the Choropleth map
fig = create_ev_choropleth_map(df)
fig.show()
import pandas as pd
# Converting the 'Model Year' column to datetime type
df['Model Year'] = pd.to_datetime(df['Model Year'], format='%Y')
# Group by 'Model Year' and 'Make' to get the count of each make for each year
df_grouped = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
import bar_chart_race as bcr
# Pivot the DataFrame to have 'Make' as columns and 'Model Year' as index
df_pivot = df_grouped.pivot(index='Model Year', columns='Make', values='Count')
# Fill missing values using forward fill (pad)
df_pivot = df_pivot.fillna(method='pad')
# Create the Racing Bar Plot
bcr.bar_chart_race(
df=df_pivot,
filename='ev_make_racing_bar_plot.mp4', # Output file name for the animation
orientation='h', # Horizontal bars
sort='desc', # Sort the bars in descending order
n_bars=10, # Number of bars to show
fixed_order=False, # Allow bars to change positions
title='EV Make Racing Bar Plot by Year', # Animation title
label_bars=True, # Show the value label on each bar
period_label={'x': 0.99, 'y': 0.25, 'ha': 'right', 'va': 'center'}, # Position of the year label
period_fmt='%Y', # Format of the year label
)